#setwd('/afs/inf.ed.ac.uk/user/s17/s1725186/Documents/PhD-Models/FirstPUModel/RMarkdowns')

library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(dendextend)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(GGally)
library(expss)
library(polycor)
library(foreach) ; library(doParallel)
library(knitr)
library(biomaRt)
library(anRichment) ; library(BrainDiseaseCollection)
suppressWarnings(suppressMessages(library(WGCNA)))

Load preprocessed dataset (preprocessing code in 19_10_14_data_preprocessing.Rmd) and clustering (pipeline in 19_10_21_WGCNA.Rmd)

# Gandal dataset
load('./../Data/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
DE_info = DE_info %>% data.frame


# GO Neuronal annotations: regex 'neuron' in GO functional annotations and label the genes that make a match as neuronal
GO_annotations = read.csv('./../Data/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>% 
              mutate('ID'=as.character(ensembl_gene_id)) %>% 
              dplyr::select(-ensembl_gene_id) %>% distinct(ID) %>%
              mutate('Neuronal'=1)


# SFARI Genes
SFARI_genes = read_csv('./../../../SFARI/Data/SFARI_genes_01-03-2020_w_ensembl_IDs.csv')
SFARI_genes = SFARI_genes[!duplicated(SFARI_genes$ID) & !is.na(SFARI_genes$ID),]


# Clusterings
clusterings = read_csv('./../Data/clusters.csv')


# Update DE_info with SFARI and Neuronal information
genes_info = DE_info %>% mutate('ID'=rownames(.)) %>% left_join(SFARI_genes, by='ID') %>% 
  mutate(`gene-score`=ifelse(is.na(`gene-score`), 'None', `gene-score`)) %>%
  left_join(GO_neuronal, by='ID') %>% left_join(clusterings, by='ID') %>%
  mutate(Neuronal=ifelse(is.na(Neuronal), 0, Neuronal)) %>%
  mutate(gene.score=ifelse(`gene-score`=='None' & Neuronal==1, 'Neuronal', `gene-score`), 
         significant=padj<0.05 & !is.na(padj))

# Add gene symbol
getinfo = c('ensembl_gene_id','external_gene_id')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
               host='feb2014.archive.ensembl.org') ## Gencode v19
gene_names = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=genes_info$ID, mart=mart)

genes_info = genes_info %>% left_join(gene_names, by=c('ID'='ensembl_gene_id'))


clustering_selected = 'DynamicHybrid'
genes_info$Module = genes_info[,clustering_selected]

dataset = read.csv(paste0('./../Data/dataset_', clustering_selected, '.csv'))
dataset$Module = dataset[,clustering_selected]

# Correct SFARI Scores
dataset$gene.score = genes_info$gene.score


SFARI_colour_hue = function(r) {
  pal = c('#FF7631','#FFB100','#E8E328','#b3b3b3','#808080','gray','#d9d9d9')[r]
}

rm(DE_info, GO_annotations, clusterings, getinfo, mart, dds)


Relation to external clinical traits


Quantifying module-trait associations


Using the hetcor function, that calculates Pearson, polyserial or polychoric correlations depending on the type of variables involved.

datTraits = datMeta %>% dplyr::select(Diagnosis, Region, Sex, Age, PMI, RNAExtractionBatch) %>%
            dplyr::rename('ExtractionBatch' = RNAExtractionBatch)

# Recalculate MEs with color labels
ME_object = datExpr %>% t %>% moduleEigengenes(colors = genes_info$Module)
MEs = orderMEs(ME_object$eigengenes)

# Calculate correlation between eigengenes and the traits and their p-values
moduleTraitCor = MEs %>% apply(2, function(x) hetcor(x, datTraits)$correlations[1,-1]) %>% t
rownames(moduleTraitCor) = colnames(MEs)
colnames(moduleTraitCor) = colnames(datTraits)
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nrow(datExpr))

# Create text matrix for the Heatmap
textMatrix = paste0(signif(moduleTraitCor, 2), ' (', signif(moduleTraitPvalue, 1), ')')
dim(textMatrix) = dim(moduleTraitCor)

# In case there are any NAs
if(sum(!complete.cases(moduleTraitCor))>0){
  print(paste0(sum(is.na(moduleTraitCor)),' correlation(s) could not be calculated')) 
}

rm(ME_object)

I’m going to select all the modules that have an absolute correlation higher than 0.9 with Diagnosis to study them

# Sort moduleTraitCor by Diagnosis
moduleTraitCor = moduleTraitCor[order(moduleTraitCor[,1], decreasing=TRUE),]
moduleTraitPvalue = moduleTraitPvalue[order(moduleTraitCor[,1], decreasing=TRUE),]

# Create text matrix for the Heatmap
textMatrix = paste0(signif(moduleTraitCor, 2), ' (', signif(moduleTraitPvalue, 1), ')')
dim(textMatrix) = dim(moduleTraitCor)


labeledHeatmap(Matrix = moduleTraitCor, xLabels = names(datTraits), yLabels =  gsub('ME','',rownames(moduleTraitCor)), 
               yColorWidth=0, colors = brewer.pal(11,'PiYG'), bg.lab.y = gsub('ME','',rownames(moduleTraitCor)),
               textMatrix = textMatrix, setStdMargins = FALSE, cex.text = 0.8, cex.lab.y = 0.75, zlim = c(-1,1),
               main = paste('Module-Trait relationships'))

diagnosis_cor = data.frame('Module' = gsub('ME','',rownames(moduleTraitCor)),
                           'MTcor' = moduleTraitCor[,'Diagnosis'],
                           'MTpval' = moduleTraitPvalue[,'Diagnosis'])

genes_info = genes_info %>% left_join(diagnosis_cor, by='Module')

rm(moduleTraitPvalue, datTraits, textMatrix, diagnosis_cor)


Studying the modules with the highest absolute correlation to Diagnosis


The modules consist mainly of points with very high (absolute) values in PC2 (which we know is related to lfc), so this result is consistent with the high correlation between Module and Diagnosis, although some of the points with the highest PC2 values do not belong to these top modules

top_modules = gsub('ME','',rownames(moduleTraitCor)[abs(moduleTraitCor[,'Diagnosis'])>0.9])

cat(paste0('Top modules selected: ', paste(top_modules, collapse=', '),'\n'))
## Top modules selected: #73B000, #EE8045, #00B9E3, #84AD00
pca = datExpr %>% prcomp

plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
            left_join(dataset, by='ID') %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
            dplyr::select(ID, external_gene_id, PC1, PC2, Module, gene.score) %>%
            mutate(ImportantModules = ifelse(Module %in% top_modules, as.character(Module), 'Others')) %>%
            mutate(color = ifelse(ImportantModules=='Others','gray',ImportantModules),
                   alpha = ifelse(ImportantModules=='Others', 0.2, 0.4),
                   gene_id = paste0(ID, ' (', external_gene_id, ')'))

table(plot_data$ImportantModules)
## 
## #00B9E3 #73B000 #84AD00 #EE8045  Others 
##    1733    1461     891     884   11178
ggplotly(plot_data %>% ggplot(aes(PC1, PC2, color=ImportantModules)) + 
         geom_point(alpha=plot_data$alpha, color=plot_data$color, aes(ID=gene_id)) + theme_minimal() + 
           ggtitle('Modules with strongest relation to Diagnosis'))
rm(pca)




Module Membership vs Gene Significance


create_plot = function(module){
  
  plot_data = dataset %>% dplyr::select(ID, paste0('MM.',gsub('#','',module)), GS, gene.score) %>% filter(dataset$Module==module)
  colnames(plot_data)[2] = 'Module'
  
  SFARI_colors = as.numeric(names(table(as.character(plot_data$gene.score)[plot_data$gene.score!='None'])))
  SFARI_colors = SFARI_colors[!is.na(SFARI_colors)]
  
  p = ggplotly(plot_data %>% ggplot(aes(Module, GS, color=gene.score)) + geom_point(alpha=0.5, aes(ID=ID)) + ylab('Gene Significance') +
               scale_color_manual(values=SFARI_colour_hue(r=c(SFARI_colors,5,6))) + theme_minimal() + xlab('Module Membership') +
               ggtitle(paste0('Module ', module,'  (MTcor = ', round(moduleTraitCor[paste0('ME',module),1],2),')')))
  
  return(p)
}

create_plot(top_modules[1])
create_plot(top_modules[2])
create_plot(top_modules[3])
create_plot(top_modules[4])
rm(create_plot)




SFARI Genes


List of top SFARI Genes in top modules ordered by SFARI score and Gene Significance

table_data = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
             dplyr::select(ID, external_gene_id, GS, gene.score, Module) %>% arrange(gene.score, desc(abs(GS))) %>%
             dplyr::rename('Ensembl ID'=ID, 'Gene Symbol'=external_gene_id, 
                    'SFARI score'=gene.score, 'Gene Significance'=GS)

kable(table_data %>% filter(Module == top_modules[1] & `SFARI score` %in% c(1,2,3)) %>% dplyr::select(-Module),
      caption=paste0('Top SFARI Genes for Module ', top_modules[1]))
Top SFARI Genes for Module #73B000
Ensembl ID Gene Symbol Gene Significance SFARI score
ENSG00000158321 AUTS2 0.5297062 1
ENSG00000132510 KDM6B 0.3312280 1
ENSG00000165186 PTCHD1 0.3027356 1
ENSG00000151067 CACNA1C 0.2124455 1
ENSG00000108001 EBF3 0.1835242 1
ENSG00000168036 CTNNB1 0.1598633 1
ENSG00000189056 RELN -0.1016272 1
ENSG00000083168 KAT6A 0.5999494 2
ENSG00000205581 HMGN1 0.5655605 2
ENSG00000259207 ITGB3 0.4676222 2
ENSG00000169432 SCN9A 0.4140399 2
ENSG00000149571 KIRREL3 0.3970895 2
ENSG00000113742 CPEB4 0.3724287 2
ENSG00000196839 ADA 0.3686762 2
ENSG00000146938 NLGN4X 0.2547162 2
ENSG00000118432 CNR1 0.2248206 2
ENSG00000166148 AVPR1A 0.1138557 2
ENSG00000044090 CUL7 0.0391425 2
ENSG00000151623 NR3C2 -0.0150334 2
ENSG00000165097 KDM1B 0.8228002 3
ENSG00000106366 SERPINE1 0.7911667 3
ENSG00000011422 PLAUR 0.7765401 3
ENSG00000152208 GRID2 0.6205906 3
ENSG00000073756 PTGS2 0.5256987 3
ENSG00000137713 PPP2R1B 0.5152266 3
ENSG00000181744 C3orf58 0.5078650 3
ENSG00000234745 HLA-B 0.4927415 3
ENSG00000157554 ERG 0.4900669 3
ENSG00000068024 HDAC4 0.4764332 3
ENSG00000182372 CLN8 0.3979418 3
ENSG00000070371 CLTCL1 0.3914968 3
ENSG00000170836 PPM1D 0.3912223 3
ENSG00000071246 VASH1 0.3805608 3
ENSG00000196277 GRM7 0.3762334 3
ENSG00000251664 PCDHA12 0.3677659 3
ENSG00000070366 SMG6 0.3602051 3
ENSG00000156564 LRFN2 0.3558337 3
ENSG00000215045 GRID2IP 0.2860911 3
ENSG00000158089 GALNT14 0.2811434 3
ENSG00000184903 IMMP2L 0.1975311 3
ENSG00000128833 MYO5C 0.1820315 3
ENSG00000173338 KCNK7 0.1804904 3
ENSG00000157890 MEGF11 0.1801030 3
ENSG00000156011 PSD3 0.1754519 3
ENSG00000184845 DRD1 0.1669913 3
ENSG00000130508 PXDN 0.1229290 3
ENSG00000187957 DNER 0.0572460 3
ENSG00000188277 C15orf62 0.0452293 3
ENSG00000187546 AGMO -0.0249874 3
ENSG00000127329 PTPRB -0.0195590 3
kable(table_data %>% filter(Module == top_modules[2] & `SFARI score` %in% c(1,2,3)) %>% dplyr::select(-Module),
      caption=paste0('Top SFARI Genes for Module ', top_modules[2]))
Top SFARI Genes for Module #EE8045
Ensembl ID Gene Symbol Gene Significance SFARI score
ENSG00000181090 EHMT1 0.7732158 1
ENSG00000110066 SUV420H1 0.6536202 1
ENSG00000038382 TRIO 0.6260413 1
ENSG00000148143 ZNF462 0.6188215 1
ENSG00000165671 NSD1 0.5413122 1
ENSG00000141431 ASXL3 0.5017802 1
ENSG00000145348 TBCK 0.2948037 1
ENSG00000153234 NR4A2 0.2464660 1
ENSG00000102974 CTCF 0.1270874 1
ENSG00000168769 TET2 0.8170356 2
ENSG00000162946 DISC1 0.6648966 2
ENSG00000141027 NCOR1 0.4643907 2
ENSG00000197724 PHF2 0.3656794 2
ENSG00000145020 AMT 0.3410208 2
ENSG00000204764 RANBP17 0.2998930 2
ENSG00000101004 NINL 0.2768068 2
ENSG00000130940 CASZ1 0.2636101 2
ENSG00000198218 QRICH1 0.2571314 2
ENSG00000128573 FOXP2 0.2493147 2
ENSG00000070047 PHRF1 0.2458754 2
ENSG00000112902 SEMA5A 0.2184377 2
ENSG00000165995 CACNB2 0.1700655 2
ENSG00000172780 RAB43 0.1656377 2
ENSG00000171988 JMJD1C 0.7508001 3
ENSG00000172554 SNTG2 0.7352646 3
ENSG00000184226 PCDH9 0.7208942 3
ENSG00000181481 RNF135 0.6744660 3
ENSG00000227184 EPPK1 0.5949514 3
ENSG00000188785 ZNF548 0.5799337 3
ENSG00000131042 LILRB2 0.4087052 3
ENSG00000131374 TBC1D5 0.4036662 3
ENSG00000179094 PER1 0.3936445 3
ENSG00000133216 EPHB2 0.3498022 3
ENSG00000239389 PCDHA13 0.3411571 3
ENSG00000196367 TRRAP 0.3398825 3
ENSG00000115556 PLCD4 0.3366430 3
ENSG00000007314 SCN4A 0.3324757 3
ENSG00000150275 PCDH15 0.3322272 3
ENSG00000109743 BST1 0.3278767 3
ENSG00000182472 CAPN12 0.3164282 3
ENSG00000224389 C4B 0.3072172 3
ENSG00000133665 DYDC2 0.2718275 3
ENSG00000169252 ADRB2 0.2100597 3
ENSG00000174332 GLIS1 0.2068267 3
ENSG00000178665 ZNF713 0.2014021 3
ENSG00000135999 EPC2 0.1758127 3
ENSG00000154957 ZNF18 0.1212498 3
ENSG00000196391 ZNF774 0.1120060 3
ENSG00000189283 FHIT 0.0671324 3
ENSG00000166736 HTR3A 0.0604890 3
kable(table_data %>% filter(Module == top_modules[3] & `SFARI score` %in% c(1,2,3)) %>% dplyr::select(-Module),
      caption=paste0('Top SFARI Genes for Module ', top_modules[3]))
Top SFARI Genes for Module #00B9E3
Ensembl ID Gene Symbol Gene Significance SFARI score
ENSG00000144285 SCN1A -0.8000233 1
ENSG00000196876 SCN8A -0.7887782 1
ENSG00000158445 KCNB1 -0.7188371 1
ENSG00000061676 NCKAP1 -0.6906433 1
ENSG00000021645 NRXN3 -0.6715730 1
ENSG00000136535 TBR1 -0.6536200 1
ENSG00000136531 SCN2A -0.5374597 1
ENSG00000145362 ANK2 -0.4367876 1
ENSG00000172915 NBEA -0.4147304 1
ENSG00000050030 KIAA2022 -0.4001380 1
ENSG00000119866 BCL11A -0.3844250 1
ENSG00000008086 CDKL5 -0.3642207 1
ENSG00000174775 HRAS -0.3233708 1
ENSG00000114861 FOXP1 0.2551455 1
ENSG00000198963 RORB -0.1988853 1
ENSG00000036257 CUL3 -0.1935419 1
ENSG00000134138 MEIS2 -0.0195493 1
ENSG00000010818 HIVEP2 0.0164513 1
ENSG00000074590 NUAK1 -0.8235832 2
ENSG00000170579 DLGAP1 -0.7789832 2
ENSG00000132294 EFR3A -0.7500147 2
ENSG00000078328 RBFOX1 -0.7311006 2
ENSG00000003147 ICA1 -0.7126337 2
ENSG00000197535 MYO5A -0.7116545 2
ENSG00000175497 DPP10 -0.7059507 2
ENSG00000174469 CNTNAP2 -0.7002690 2
ENSG00000182621 PLCB1 -0.6917670 2
ENSG00000166501 PRKCB -0.6151170 2
ENSG00000139726 DENR -0.5936537 2
ENSG00000142230 SAE1 -0.5499380 2
ENSG00000183454 GRIN2A -0.5288764 2
ENSG00000185345 PARK2 -0.4980373 2
ENSG00000164506 STXBP5 -0.4854943 2
ENSG00000182771 GRID1 -0.4477343 2
ENSG00000132024 CC2D1A -0.4158439 2
ENSG00000157445 CACNA2D3 -0.3393135 2
ENSG00000144619 CNTN4 -0.3109404 2
ENSG00000185008 ROBO2 -0.3078622 2
ENSG00000182256 GABRG3 -0.2878943 2
ENSG00000139174 PRICKLE1 -0.2154883 2
ENSG00000134115 CNTN6 -0.1945703 2
ENSG00000166206 GABRB3 -0.1793562 2
ENSG00000138411 HECW2 -0.1360240 2
ENSG00000120251 GRIA2 -0.1278103 2
ENSG00000149970 CNKSR2 0.0921242 2
ENSG00000140945 CDH13 0.0846523 2
ENSG00000166147 FBN1 -0.0830677 2
ENSG00000171723 GPHN -0.0744071 2
ENSG00000149972 CNTN5 -0.0486109 2
ENSG00000144406 UNC80 -0.8619629 3
ENSG00000132639 SNAP25 -0.8102863 3
ENSG00000163618 CADPS -0.8086036 3
ENSG00000035687 ADSS -0.7983791 3
ENSG00000159082 SYNJ1 -0.7857393 3
ENSG00000104093 DMXL2 -0.7626271 3
ENSG00000170113 NIPA1 -0.7171001 3
ENSG00000172260 NEGR1 -0.7076941 3
ENSG00000163399 ATP1A1 -0.6892969 3
ENSG00000115840 SLC25A12 -0.6828484 3
ENSG00000196090 PTPRT -0.6741994 3
ENSG00000130477 UNC13A -0.6664444 3
ENSG00000154263 ABCA10 -0.6424383 3
ENSG00000153575 TUBGCP5 -0.6291099 3
ENSG00000136928 GABBR2 -0.6249389 3
ENSG00000144290 SLC4A10 -0.5758631 3
ENSG00000186094 AGBL4 -0.5383835 3
ENSG00000144331 ZNF385B -0.5378671 3
ENSG00000196433 ASMT -0.5322497 3
ENSG00000144893 MED12L -0.5311931 3
ENSG00000155961 RAB39B -0.5253969 3
ENSG00000116141 MARK1 -0.5059225 3
ENSG00000198010 DLGAP2 -0.4955545 3
ENSG00000155886 SLC24A2 -0.4942000 3
ENSG00000109158 GABRA4 -0.4937671 3
ENSG00000243232 PCDHAC2 -0.4783915 3
ENSG00000175161 CADM2 -0.4554469 3
ENSG00000157152 SYN2 -0.4517946 3
ENSG00000162631 NTNG1 -0.4511032 3
ENSG00000182901 RGS7 -0.4464033 3
ENSG00000133019 CHRM3 -0.4269528 3
ENSG00000156113 KCNMA1 -0.4056474 3
ENSG00000081189 MEF2C -0.3994354 3
ENSG00000114374 USP9Y -0.3530437 3
ENSG00000135312 HTR1B -0.3456140 3
ENSG00000174442 ZWILCH -0.3290704 3
ENSG00000136653 RASSF5 -0.3278436 3
ENSG00000152495 CAMK4 -0.3173632 3
ENSG00000060140 STYK1 -0.3142629 3
ENSG00000165355 FBXO33 -0.3044234 3
ENSG00000183117 CSMD1 -0.2927903 3
ENSG00000170289 CNGB3 -0.2731971 3
ENSG00000119125 GDA -0.2696689 3
ENSG00000139915 MDGA2 -0.2435673 3
ENSG00000042781 USH2A 0.2419483 3
ENSG00000152402 GUCY1A2 -0.2325757 3
ENSG00000159640 ACE -0.1962668 3
ENSG00000185344 ATP6V0A2 -0.1924963 3
ENSG00000150394 CDH8 -0.1866258 3
ENSG00000169083 AR 0.1691410 3
ENSG00000004468 CD38 -0.1689250 3
ENSG00000215018 COL28A1 -0.1475973 3
ENSG00000128513 POT1 -0.1415512 3
ENSG00000244588 RAD21L1 -0.1256962 3
ENSG00000165379 LRFN5 -0.1220706 3
ENSG00000100038 TOP3B -0.1030750 3
ENSG00000163873 GRIK3 0.0933052 3
ENSG00000128512 DOCK4 -0.0898793 3
ENSG00000134909 ARHGAP32 0.0825189 3
ENSG00000204632 HLA-G -0.0733823 3
ENSG00000187775 DNAH17 0.0712448 3
ENSG00000165246 NLGN4Y -0.0681445 3
ENSG00000181036 FCRL6 0.0606235 3
kable(table_data %>% filter(Module == top_modules[4] & `SFARI score` %in% c(1,2,3)) %>% dplyr::select(-Module),
      caption=paste0('Top SFARI Genes for Module ', top_modules[4]))
Top SFARI Genes for Module #84AD00
Ensembl ID Gene Symbol Gene Significance SFARI score
ENSG00000139613 SMARCC2 -0.4329730 1
ENSG00000171759 PAH -0.6896875 2
ENSG00000169918 OTUD7A -0.4706156 2
ENSG00000106290 TAF6 -0.4144602 2
ENSG00000196557 CACNA1H -0.4127420 2
ENSG00000133026 MYH10 -0.3262873 2
ENSG00000101489 CELF4 -0.3169738 2
ENSG00000180914 OXTR -0.1668994 2
ENSG00000135631 RAB11FIP5 -0.8340525 3
ENSG00000128594 LRRC4 -0.4715288 3
ENSG00000156110 ADK -0.4684323 3
ENSG00000130731 C16orf13 -0.4412635 3
ENSG00000117594 HSD11B1 -0.3659904 3
ENSG00000160862 AZGP1 -0.3630511 3
ENSG00000064687 ABCA7 -0.3146496 3
ENSG00000135127 CCDC64 -0.3057298 3
ENSG00000185115 NDNL2 -0.2931715 3
ENSG00000115421 PAPOLG -0.2389588 3
ENSG00000111615 KRR1 -0.2227080 3
ENSG00000100359 SGSM3 -0.2078293 3
ENSG00000101883 RHOXF1 -0.1923108 3
ENSG00000131653 TRAF7 -0.1326903 3
ENSG00000138650 PCDH10 -0.1102729 3
ENSG00000213923 CSNK1E 0.0323072 3

Modules with the strongest module-diagnosis correlation should have the highest percentage of SFARI Genes, but this doesn’t seem to be the case (even the opposite may be true)

plot_data = dataset %>% mutate('hasSFARIscore' = gene.score!='None') %>% 
            group_by(Module, MTcor, hasSFARIscore) %>% summarise(p=n()) %>% 
            left_join(dataset %>% group_by(Module) %>% summarise(n=n()), by='Module') %>% 
            mutate(p=round(p/n*100,2)) 

for(i in 1:nrow(plot_data)){
  this_row = plot_data[i,]
  if(this_row$hasSFARIscore==FALSE & this_row$p==100){
    new_row = this_row
    new_row$hasSFARIscore = TRUE
    new_row$p = 0
    plot_data = plot_data %>% rbind(new_row)
  }
}

plot_data = plot_data %>% filter(hasSFARIscore==TRUE)

ggplotly(plot_data %>% ggplot(aes(MTcor, p, size=n)) + geom_smooth(color='gray', se=FALSE) +
         geom_point(color=plot_data$Module, alpha=0.5, aes(id=Module)) + geom_hline(yintercept=mean(plot_data$p), color='gray') +
         xlab('Module-Diagnosis correlation') + ylab('% of SFARI genes') +
         theme_minimal() + theme(legend.position = 'none'))
rm(i, this_row, new_row, plot_data)

Breaking the SFARI genes by score

scores = c(1,2,3,'None')

plot_data = dataset %>% group_by(Module, MTcor, gene.score) %>% summarise(n=n()) %>% 
            left_join(dataset %>% group_by(Module) %>% summarise(N=n()), by='Module') %>% 
            mutate(p=round(n/N*100,2), gene.score = as.character(gene.score))

for(i in 1:nrow(plot_data)){
  this_row = plot_data[i,]
  if(sum(plot_data$Module == this_row$Module)<7){
    missing_scores = which(! scores %in% plot_data$gene.score[plot_data$Module == this_row$Module])
    for(s in missing_scores){
      new_row = this_row
      new_row$gene.score = as.character(s)
      new_row$n = 0
      new_row$p = 0
      plot_data = plot_data %>% rbind(new_row) 
    }
  }
}

plot_data = plot_data %>% filter(gene.score != 'None')

plot_function = function(i){
  i = 2*i-1
  plot_list = list()
  for(j in 1:2){
    plot_list[[j]] = ggplotly(plot_data %>% filter(gene.score==scores[i+j-1]) %>% ggplot(aes(MTcor, p, size=n)) + 
                geom_smooth(color='gray', se=FALSE) +
                geom_point(color=plot_data$Module[plot_data$gene.score==scores[i+j-1]], alpha=0.5, aes(id=Module)) +
                geom_hline(yintercept=mean(plot_data$p[plot_data$gene.score==scores[i+j-1]]), color='gray') +
                xlab('Module-Diagnosis correlation') + ylab('% of SFARI genes') +
                theme_minimal() + theme(legend.position = 'none'))
  }
  p = subplot(plot_list, nrows=1) %>% layout(annotations = list(
    list(x = 0.2 , y = 1.05, text = paste0('SFARI score ', scores[i]), showarrow = F, xref='paper', yref='paper'),
    list(x = 0.8 , y = 1.05, text = paste0('SFARI score ', scores[i+1]), showarrow = F, xref='paper', yref='paper')))
  
  return(p)
}

plot_function(1)
plot_function(2)
rm(i, s, this_row, new_row, plot_function)




Module Eigengenes


Since these modules have the strongest relation to autism, this pattern should be reflected in their model eigengenes, having two different behaviours for the samples corresponding to autism and the ones corresponding to control.

In both cases, the Eigengenes separate the behaviour between autism and control samples very clearly!

plot_EGs = function(module){

  plot_data = data.frame('ID' = rownames(MEs), 'MEs' = MEs[,paste0('ME',module)], 'Diagnosis' = datMeta$Diagnosis)

  p = plot_data %>% ggplot(aes(Diagnosis, MEs, fill=Diagnosis)) + geom_boxplot() + theme_minimal() + theme(legend.position='none') +
                    ggtitle(paste0('Module ', module, '  (MTcor=',round(moduleTraitCor[paste0('ME',module),1],2),')'))
  return(p)
}

p1 = plot_EGs(top_modules[1])
p2 = plot_EGs(top_modules[2])
p3 = plot_EGs(top_modules[3])
p4 = plot_EGs(top_modules[4])

grid.arrange(p1, p2, p3, p4, nrow=2)

rm(plot_EGs, p1, p2, p3, p4)




Identifying important genes


Selecting the modules with the highest correlation to Diagnosis, and, from them, the genes with the highest module membership-(absolute) gene significance

*Ordered by \(\frac{MM+|GS|}{2}\)

There aren’t that many SFARI genes in the top genes of the modules and not a single one belonging to scores 1 and 2

create_table = function(module){
  top_genes = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>% 
              dplyr::select(ID, external_gene_id, paste0('MM.',gsub('#','',module)), GS, gene.score) %>%
              filter(dataset$Module==module) %>% dplyr::rename('MM' = paste0('MM.',gsub('#','',module))) %>% 
              mutate(importance = (MM+abs(GS))/2) %>% arrange(by=-importance) %>% top_n(20)
  return(top_genes)
}

top_genes = list()
for(i in 1:length(top_modules)) top_genes[[i]] = create_table(top_modules[i])

kable(top_genes[[1]], caption=paste0('Top 10 genes for module ', top_modules[1], '  (MTcor = ',
                                  round(moduleTraitCor[paste0('ME',top_modules[1]),1],2),')'))
Top 10 genes for module #73B000 (MTcor = 0.94)
ID external_gene_id MM GS gene.score importance
ENSG00000143384 MCL1 0.8500241 0.9078060 None 0.8789151
ENSG00000158615 PPP1R15B 0.8407717 0.8212537 None 0.8310127
ENSG00000196935 SRGAP1 0.7175838 0.9224538 None 0.8200188
ENSG00000161638 ITGA5 0.7550781 0.8597979 None 0.8074380
ENSG00000148841 ITPRIP 0.7913310 0.8161802 None 0.8037556
ENSG00000120278 PLEKHG1 0.7857882 0.8086459 None 0.7972171
ENSG00000133639 BTG1 0.7819379 0.8082183 None 0.7950781
ENSG00000101493 ZNF516 0.6925114 0.8811710 None 0.7868412
ENSG00000150457 LATS2 0.7097060 0.8499030 None 0.7798045
ENSG00000097007 ABL1 0.7612757 0.7941289 None 0.7777023
ENSG00000253731 PCDHGA6 0.7450788 0.8036535 None 0.7743662
ENSG00000106366 SERPINE1 0.7535342 0.7911667 3 0.7723505
ENSG00000198795 ZNF521 0.7862020 0.7397450 None 0.7629735
ENSG00000154640 BTG3 0.6921295 0.8306872 None 0.7614084
ENSG00000138166 DUSP5 0.7541208 0.7643020 None 0.7592114
ENSG00000072364 AFF4 0.7413482 0.7716602 None 0.7565042
ENSG00000204569 PPP1R10 0.7470986 0.7645752 None 0.7558369
ENSG00000135913 USP37 0.6855044 0.8254166 None 0.7554605
ENSG00000197329 PELI1 0.7612944 0.7481606 None 0.7547275
ENSG00000130222 GADD45G 0.6503873 0.8500145 None 0.7502009
kable(top_genes[[2]], caption=paste0('Top 10 genes for module ', top_modules[2], '  (MTcor = ',
                                  round(moduleTraitCor[paste0('ME',top_modules[2]),1],2),')'))
Top 10 genes for module #EE8045 (MTcor = 0.93)
ID external_gene_id MM GS gene.score importance
ENSG00000003402 CFLAR 0.8349806 0.8167723 None 0.8258764
ENSG00000138119 MYOF 0.8095979 0.8334501 None 0.8215240
ENSG00000162745 OLFML2B 0.7794224 0.8526613 None 0.8160419
ENSG00000089159 PXN 0.7551603 0.8687338 None 0.8119471
ENSG00000124782 RREB1 0.7602334 0.8444107 None 0.8023221
ENSG00000198185 ZNF334 0.8308122 0.7574061 None 0.7941092
ENSG00000168769 TET2 0.7660493 0.8170356 2 0.7915424
ENSG00000120690 ELF1 0.7858080 0.7870686 None 0.7864383
ENSG00000163629 PTPN13 0.7149171 0.8558172 Neuronal 0.7853671
ENSG00000183808 RBM12B 0.6787244 0.8906825 None 0.7847035
ENSG00000104870 FCGRT 0.6702519 0.8567333 None 0.7634926
ENSG00000171988 JMJD1C 0.7726856 0.7508001 3 0.7617428
ENSG00000117000 RLF 0.8221159 0.7009312 None 0.7615236
ENSG00000173110 HSPA6 0.7142783 0.8087644 None 0.7615214
ENSG00000180596 HIST1H2BC 0.7506019 0.7717162 None 0.7611591
ENSG00000102531 FNDC3A 0.7265045 0.7897408 None 0.7581227
ENSG00000110719 TCIRG1 0.6831133 0.8306871 None 0.7569002
ENSG00000130066 SAT1 0.7490607 0.7561901 None 0.7526254
ENSG00000065978 YBX1 0.6670100 0.8339125 None 0.7504612
ENSG00000073792 IGF2BP2 0.6578041 0.8387673 None 0.7482857
kable(top_genes[[3]], caption=paste0('Top 10 genes for module ', top_modules[3], '  (MTcor = ',
                                  round(moduleTraitCor[paste0('ME',top_modules[3]),1],2),')'))
Top 10 genes for module #00B9E3 (MTcor = -0.93)
ID external_gene_id MM GS gene.score importance
ENSG00000050748 MAPK9 0.9018732 -0.9400140 Neuronal 0.9209436
ENSG00000108395 TRIM37 0.9037848 -0.9297931 None 0.9167889
ENSG00000138078 PREPL 0.9001507 -0.9038232 None 0.9019869
ENSG00000177432 NAP1L5 0.8683153 -0.9048326 None 0.8865739
ENSG00000155097 ATP6V1C1 0.8612597 -0.8966476 None 0.8789537
ENSG00000114573 ATP6V1A 0.8322001 -0.9172599 None 0.8747300
ENSG00000163577 EIF5A2 0.8346941 -0.9108048 None 0.8727495
ENSG00000128881 TTBK2 0.8697142 -0.8751661 None 0.8724401
ENSG00000171132 PRKCE 0.8338660 -0.9088992 None 0.8713826
ENSG00000111674 ENO2 0.8876355 -0.8482909 Neuronal 0.8679632
ENSG00000176490 DIRAS1 0.8331173 -0.8967040 None 0.8649106
ENSG00000131437 KIF3A 0.8801280 -0.8462276 Neuronal 0.8631778
ENSG00000196876 SCN8A 0.9339299 -0.7887782 1 0.8613540
ENSG00000125814 NAPB 0.8865175 -0.8311321 None 0.8588248
ENSG00000172348 RCAN2 0.7982100 -0.9149243 None 0.8565672
ENSG00000184368 MAP7D2 0.8249437 -0.8801442 None 0.8525440
ENSG00000162694 EXTL2 0.8007142 -0.8982366 None 0.8494754
ENSG00000130540 SULT4A1 0.8815880 -0.8113335 None 0.8464608
ENSG00000144285 SCN1A 0.8893987 -0.8000233 1 0.8447110
ENSG00000132639 SNAP25 0.8756003 -0.8102863 3 0.8429433
kable(top_genes[[4]], caption=paste0('Top 10 genes for module ', top_modules[4], '  (MTcor = ',
                                  round(moduleTraitCor[paste0('ME',top_modules[4]),1],2),')'))
Top 10 genes for module #84AD00 (MTcor = -0.93)
ID external_gene_id MM GS gene.score importance
ENSG00000106683 LIMK1 0.8336101 -0.8623672 Neuronal 0.8479886
ENSG00000141576 RNF157 0.8343007 -0.8485893 None 0.8414450
ENSG00000127838 PNKD 0.8393553 -0.8333891 None 0.8363722
ENSG00000140854 KATNB1 0.8646680 -0.7907368 Neuronal 0.8277024
ENSG00000148334 PTGES2 0.8037964 -0.8316634 None 0.8177299
ENSG00000006432 MAP3K9 0.7465523 -0.8872405 None 0.8168964
ENSG00000130725 UBE2M 0.7814384 -0.8501315 Neuronal 0.8157850
ENSG00000105251 SHD 0.7661369 -0.8630235 None 0.8145802
ENSG00000139190 VAMP1 0.8258018 -0.8015636 Neuronal 0.8136827
ENSG00000168993 CPLX1 0.7956338 -0.8267869 Neuronal 0.8112104
ENSG00000183837 PNMA3 0.7999070 -0.8021580 None 0.8010325
ENSG00000180155 LYNX1 0.7934936 -0.8012133 None 0.7973535
ENSG00000124191 TOX2 0.7739545 -0.8098081 None 0.7918813
ENSG00000139637 C12orf10 0.7420227 -0.8338129 None 0.7879178
ENSG00000196972 LINC00087 0.8285660 -0.7454300 None 0.7869980
ENSG00000135631 RAB11FIP5 0.7285209 -0.8340525 3 0.7812867
ENSG00000174238 PITPNA 0.6804206 -0.8774836 None 0.7789521
ENSG00000106789 CORO2A 0.7260306 -0.8209005 None 0.7734656
ENSG00000196177 ACADSB 0.7340860 -0.8051786 None 0.7696323
ENSG00000127445 PIN1 0.7966748 -0.7363993 None 0.7665370
rm(create_table)
pca = datExpr %>% prcomp

ids = c()
for(tg in top_genes) ids = c(ids, tg$ID)

plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
            left_join(dataset, by='ID') %>% dplyr::select(ID, PC1, PC2, Module, gene.score) %>%
            mutate(color = ifelse(Module %in% top_modules, as.character(Module), 'gray')) %>%
            mutate(alpha = ifelse(color %in% top_modules & 
                                  ID %in% ids, 1, 0.1))

plot_data %>% ggplot(aes(PC1, PC2)) + geom_point(alpha=plot_data$alpha, color=plot_data$color) + 
              theme_minimal() + ggtitle('Important genes identified through WGCNA')

Level of expression by Diagnosis for top genes, ordered by importance (defined above)

create_plot = function(i){
  
  plot_data = datExpr[rownames(datExpr) %in% top_genes[[i]]$ID,] %>% mutate('ID' = rownames(.)) %>% 
              melt(id.vars='ID') %>% mutate(variable = gsub('X','',variable)) %>%
              left_join(top_genes[[i]], by='ID') %>%
              left_join(datMeta %>% dplyr::select(Dissected_Sample_ID, Diagnosis),
                        by = c('variable'='Dissected_Sample_ID')) %>% arrange(desc(importance))
  
  p = ggplotly(plot_data %>% mutate(external_gene_id=factor(external_gene_id, 
                                    levels=unique(plot_data$external_gene_id), ordered=T)) %>%
               ggplot(aes(external_gene_id, value, fill=Diagnosis)) + geom_boxplot() + theme_minimal() +
                      xlab(paste0('Top genes for module ', top_modules[i], ' (MTcor = ',
                       round(genes_info$MTcor[genes_info$Module==top_modules[i]][1],2), ')')) + ylab('Level of Expression') +
                      theme(axis.text.x = element_text(angle = 90, hjust = 1)))
  return(p)
  
}

create_plot(1)
create_plot(2)
create_plot(3)
create_plot(4)
rm(create_plot)




Enrichment Analysis


Using the package anRichment

file_name = './../Data/enrichmentAnalysis.RData'

if(file.exists('./../Data/enrichmentAnalysis.RData__________________________________________________________________________________________________________')){
  load(file_name)
} else {
  # Prepare dataset
  # Create dataset with top modules membership and removing the genes without an assigned module
  EA_dataset = data.frame('ensembl_gene_id' = genes_info$ID, module = genes_info$Module)  %>%
               filter(genes_info$Module!='gray')
  
  # Assign Entrez Gene Id to each gene
  getinfo = c('ensembl_gene_id','entrezgene')
  mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl', host='feb2014.archive.ensembl.org')
  biomart_output = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=EA_dataset$ensembl_gene_id, mart=mart)
  
  EA_dataset = EA_dataset %>% left_join(biomart_output, by='ensembl_gene_id')
  
  for(tm in top_modules){
    cat(paste0('\n',sum(EA_dataset$module==tm & is.na(EA_dataset$entrezgene)), ' genes from top module ',
               tm, ' don\'t have an Entrez Gene ID')) 
  }
  
  rm(getinfo, mart, biomart_output, tm)
  
  # Manual: https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/GeneAnnotation/Tutorials/anRichment-Tutorial1.pdf
  
  collectGarbage()
  
  # EA_dataset = rbind(EA_dataset[EA_dataset$module!='other',], EA_dataset[EA_dataset$module=='other',][sample(sum(EA_dataset$module=='other'), 1000),])
  
  # Prepare datasets
  GO_col = buildGOcollection(organism = 'human', verbose = 0)
  internal_col = internalCollection(organism = 'human')
  MillerAIBS_col = MillerAIBSCollection(organism = 'human')
  BrainDisease_col = BrainDiseaseCollection(organism = 'human')
  combined_col = mergeCollections(GO_col, internal_col, MillerAIBS_col, BrainDisease_col)
  
  # Print collections used
  cat('Using collections: ')
  knownGroups(combined_col, sortBy = 'size')
  
  # Perform Enrichment Analysis
  enrichment = enrichmentAnalysis(classLabels = EA_dataset$module, identifiers = EA_dataset$entrezgene,
                                  refCollection = combined_col, #useBackground = 'given', 
                                  threshold = 1e-4, thresholdType = 'Bonferroni',
                                  getOverlapEntrez = FALSE, getOverlapSymbols = TRUE)
  
  # Save enrichment results
  save(enrichment, file=file_name)
}
## Batch submitting query [==>----------------------------] 9% eta: 4s Batch
## submitting query [===>---------------------------] 12% eta: 5s Batch
## submitting query [====>--------------------------] 15% eta: 5s Batch
## submitting query [=====>-------------------------] 18% eta: 5s Batch
## submitting query [======>------------------------] 21% eta: 5s Batch
## submitting query [=======>-----------------------] 24% eta: 4s Batch
## submitting query [=======>-----------------------] 27% eta: 5s Batch
## submitting query [========>----------------------] 30% eta: 4s Batch
## submitting query [=========>---------------------] 33% eta: 4s Batch
## submitting query [==========>--------------------] 36% eta: 4s Batch
## submitting query [===========>-------------------] 39% eta: 4s Batch
## submitting query [============>------------------] 42% eta: 4s Batch
## submitting query [=============>-----------------] 45% eta: 4s Batch
## submitting query [==============>----------------] 48% eta: 3s Batch
## submitting query [===============>---------------] 52% eta: 3s Batch
## submitting query [================>--------------] 55% eta: 3s Batch
## submitting query [=================>-------------] 58% eta: 3s Batch
## submitting query [==================>------------] 61% eta: 3s Batch
## submitting query [===================>-----------] 64% eta: 2s Batch
## submitting query [====================>----------] 67% eta: 2s Batch
## submitting query [=====================>---------] 70% eta: 2s Batch
## submitting query [======================>--------] 73% eta: 2s Batch
## submitting query [======================>--------] 76% eta: 2s Batch
## submitting query [=======================>-------] 79% eta: 1s Batch
## submitting query [========================>------] 82% eta: 1s Batch
## submitting query [=========================>-----] 85% eta: 1s Batch
## submitting query [==========================>----] 88% eta: 1s Batch
## submitting query [===========================>---] 91% eta: 1s Batch
## submitting query [============================>--] 94% eta: 0s Batch submitting
## query [=============================>-] 97% eta: 0s Batch submitting query
## [===============================] 100% eta: 0s
## 
## 27 genes from top module #73B000 don't have an Entrez Gene ID
## 29 genes from top module #EE8045 don't have an Entrez Gene ID
## 27 genes from top module #00B9E3 don't have an Entrez Gene ID
## 25 genes from top module #84AD00 don't have an Entrez Gene ID
## Loading required package: org.Hs.eg.db
## 
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## Using collections:  enrichmentAnalysis: preparing data..
##   ..working on label set 1 ..


Results


kable(enrichment$enrichmentTable %>% filter(class==top_modules[1]) %>% 
      dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
                    effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
      arrange(Bonferroni, desc(enrichmentRatio)),
      caption = paste0('Enriched terms for module ', top_modules[1], ' (MTcor = ',
                       round(genes_info$MTcor[genes_info$Module==top_modules[1]][1],4), ')'))
Enriched terms for module #73B000 (MTcor = 0.9445)
dataSetID shortDataSetName inGroups Bonferroni FDR enrichmentRatio effectiveClassSize effectiveSetSize nCommonGenes
JAM:003078 Temporal Lobe_IN_Cerebral Cortex JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.0678488 0.0000609 2.581215 1423 158 37
JAMiller.AIBS.000169 Cerebellum-enriched genes in midfetal human brain JA Miller at AIBS|Brain|Prenatal brain|Brain region markers 0.1676492 0.0001374 2.755622 1423 124 31
GO:0072358 cardiovascular system development GO|GO.BP 0.3613496 0.0002744 1.688783 1423 607 93
GO:0072359 circulatory system development GO|GO.BP 0.4257766 0.0003168 1.548309 1423 897 126
GO:0001944 vasculature development GO|GO.BP 0.7108773 0.0005035 1.674535 1423 599 91
JAM:002886 Hypothalamus JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets 1.0000000 0.0019483 2.423384 1423 141 31
JAMiller.AIBS.000138 VZ/SZ/IZ enriched in E14.5 mouse JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Germinal brain 1.0000000 0.0013896 1.892415 1423 332 57
GO:0001568 blood vessel development GO|GO.BP 1.0000000 0.0013863 1.657227 1423 572 86
JAMiller.AIBS.000502 Genes bound by SUZ12 in mouse MESC from PMID 16625203 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 1.0000000 0.0014159 1.546771 1423 791 111
JAMiller.AIBS.000208 RegionalWGCNA midfetal M38 JA Miller at AIBS|Brain|Prenatal brain|WGCNA 1.0000000 0.0016658 1.503067 1423 902 123
kable(enrichment$enrichmentTable %>% filter(class==top_modules[2]) %>% 
      dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
                    effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
      arrange(Bonferroni, enrichmentRatio),
      caption = paste0('Enriched terms for module ', top_modules[2], ' (MTcor = ',
                       round(genes_info$MTcor[genes_info$Module==top_modules[2]][1],4), ')'))
Enriched terms for module #EE8045 (MTcor = 0.9309)
dataSetID shortDataSetName inGroups Bonferroni FDR enrichmentRatio effectiveClassSize effectiveSetSize nCommonGenes
JAMiller.AIBS.000176 CortexWGCNA midfetal M6 JA Miller at AIBS|Brain|Prenatal brain|WGCNA 0.0013071 0.0000016 1.928947 848 863 90
GO:0010468 regulation of gene expression GO|GO.BP 0.0318274 0.0000311 1.345662 848 3615 263
GO:1903506 regulation of nucleic acid-templated transcription GO|GO.BP 0.0330396 0.0000320 1.407197 848 2826 215
GO:0010556 regulation of macromolecule biosynthetic process GO|GO.BP 0.0358070 0.0000345 1.369898 848 3254 241
GO:0051252 regulation of RNA metabolic process GO|GO.BP 0.0389042 0.0000371 1.387082 848 3027 227
GO:2001141 regulation of RNA biosynthetic process GO|GO.BP 0.0429812 0.0000406 1.402730 848 2835 215
GO:0006355 regulation of transcription, DNA-templated GO|GO.BP 0.0610153 0.0000554 1.404287 848 2766 210
GO:0019219 regulation of nucleobase-containing compound metabolic process GO|GO.BP 0.0715698 0.0000637 1.358053 848 3296 242
GO:0097659 nucleic acid-templated transcription GO|GO.BP 0.1089999 0.0000931 1.377000 848 2982 222
GO:2000112 regulation of cellular macromolecule biosynthetic process GO|GO.BP 0.1185138 0.0001002 1.363767 848 3133 231
kable(enrichment$enrichmentTable %>% filter(class==top_modules[3]) %>% 
      dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
                    effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
      arrange(Bonferroni, desc(enrichmentRatio)),
      caption = paste0('Enriched terms for module ', top_modules[3], ' (MTcor = ',
                       round(genes_info$MTcor[genes_info$Module==top_modules[3]][1],4), ')'))
Enriched terms for module #00B9E3 (MTcor = -0.9261)
dataSetID shortDataSetName inGroups Bonferroni FDR enrichmentRatio effectiveClassSize effectiveSetSize nCommonGenes
JAMiller.AIBS.000052 CortexWGCNA 15-21 post-conception weeks C26 JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA 0.00e+00 0e+00 2.708083 1695 721 211
JAMiller.AIBS.000142 Highest in CP of 13-16 post-conception weeks human JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Postmitotic brain 0.00e+00 0e+00 2.194883 1695 1210 287
JAMiller.AIBS.000569 WGCNA humanSpecificOlivedrab2Module frontalCtx FOXP2 JA Miller at AIBS|Brain|Postnatal brain|Cortex|WGCNA 0.00e+00 0e+00 1.524703 1695 4036 665
JAM:002769 downAD_mitochondrion JAM|BrainLists|BrainLists.Blalock_AD 0.00e+00 0e+00 3.364977 1695 264 96
JAMiller.AIBS.000150 Highest in CP of E14.5 mouse JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Postmitotic brain 0.00e+00 0e+00 1.878513 1695 1266 257
JAM:003016 downAD_synapticTransmission JAM|BrainLists|BrainLists.Blalock_AD 0.00e+00 0e+00 5.152621 1695 88 49
JAMiller.AIBS.000155 Lowest in VZ of E14.5 mouse JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex 0.00e+00 0e+00 1.723944 1695 1664 310
JAMiller.AIBS.000123 HippocampusWGCNA turquoise DGenriched upAge JA Miller at AIBS|Brain|Postnatal brain|WGCNA 0.00e+00 0e+00 1.921531 1695 1098 228
JAMiller.AIBS.000570 WGCNA Olivedrab2ModuleGenes with enriched ELAVL2 targets JA Miller at AIBS|Brain|Postnatal brain|Cortex|WGCNA 0.00e+00 0e+00 2.481758 1695 481 129
JAM:003072 Tail of Caudate Nucleus_IN_Striatum JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 3.506055 1695 161 61
GO:0044456 synapse part GO|GO.CC 0.00e+00 0e+00 1.967674 1695 823 175
GO:0045202 synapse GO|GO.CC 0.00e+00 0e+00 1.834783 1695 1044 207
GO:0097060 synaptic membrane GO|GO.CC 0.00e+00 0e+00 2.418297 1695 375 98
JAMiller.AIBS.000141 CP enriched in E14.5 mouse JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Postmitotic brain 0.00e+00 0e+00 2.112789 1695 565 129
GO:0097458 neuron part GO|GO.CC 0.00e+00 0e+00 1.651046 1695 1418 253
JAM:002764 downAging_mitochondria_synapse JAM|BrainLists|BrainLists.Lu_Aging 0.00e+00 0e+00 2.349013 1695 390 99
JAM:002751 Basal Pons JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 3.196728 1695 165 57
JAM:002744 Autism_differential_expression_across_at_least_one_comparison JAM|BrainLists|BrainLists.Voineagu 0.00e+00 0e+00 1.855589 1695 763 153
JAM:003054 subiculum_IN_Hippocampal Formation JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 3.065639 1695 163 54
JAMiller.AIBS.000005 CPi markers at 21 post-conception weeks JA Miller at AIBS|Brain|Prenatal brain|Cortex|Markers of cortex layers|Postmitotic brain 1.00e-07 0e+00 2.395777 1695 309 80
JAM:002805 Cerebral Cortex JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets 2.00e-07 0e+00 2.988769 1695 161 52
JAM:002739 arcuate nucleus of medulla_IN_Myelencephalon JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 2.00e-07 0e+00 2.936799 1695 167 53
JAM:002920 Lateral Nucleus_IN_Amygdala JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 5.00e-07 0e+00 2.916314 1695 165 52
GO:0099536 synaptic signaling GO|GO.BP 5.00e-07 0e+00 1.949884 1695 560 118
GO:0034220 ion transmembrane transport GO|GO.BP 7.00e-07 0e+00 1.720897 1695 898 167
GO:0098793 presynapse GO|GO.CC 1.00e-06 0e+00 2.108154 1695 417 95
JAMiller.AIBS.000095 Cortical PNOC neurons JA Miller at AIBS|Brain|Postnatal brain|Cell type markers|Cortex 2.50e-06 0e+00 1.282364 1695 3940 546
GO:0045211 postsynaptic membrane GO|GO.CC 3.10e-06 0e+00 2.346005 1695 284 72
GO:0099537 trans-synaptic signaling GO|GO.BP 3.40e-06 0e+00 1.917431 1695 555 115
JAM:002882 Hippocampal Formation JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets 4.00e-06 0e+00 2.809155 1695 168 51
GO:0007268 chemical synaptic transmission GO|GO.BP 4.10e-06 0e+00 1.918037 1695 550 114
GO:0098916 anterograde trans-synaptic signaling GO|GO.BP 4.10e-06 0e+00 1.918037 1695 550 114
JAMiller.AIBS.000463 Genes bound by SMAD4 in HUMAN A2780 from PMID 21799915 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 9.70e-06 0e+00 1.415427 1695 2079 318
GO:0043005 neuron projection GO|GO.CC 1.07e-05 0e+00 1.625648 1695 1036 182
GO:0030424 axon GO|GO.CC 1.10e-05 0e+00 1.942358 1695 505 106
GO:0022836 gated channel activity GO|GO.MF 2.52e-05 0e+00 2.420796 1695 237 62
JAMiller.AIBS.000042 CortexWGCNA 15-21 post-conception weeks C16 SPenriched JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA 2.81e-05 0e+00 2.570469 1695 198 55
JAM:002824 Dentate Nucleus_IN_Cerebellar Nucleus JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 5.67e-05 1e-07 2.725012 1695 163 48
GO:0034702 ion channel complex GO|GO.CC 5.72e-05 1e-07 2.437355 1695 224 59
GO:0005216 ion channel activity GO|GO.MF 6.05e-05 1e-07 2.256199 1695 283 69
GO:0022839 ion gated channel activity GO|GO.MF 6.28e-05 1e-07 2.391843 1695 236 61
GO:1902495 transmembrane transporter complex GO|GO.CC 7.84e-05 1e-07 2.361023 1695 243 62
GO:0099240 intrinsic component of synaptic membrane GO|GO.CC 8.40e-05 1e-07 2.764088 1695 154 46
kable(enrichment$enrichmentTable %>% filter(class==top_modules[4]) %>% 
      dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
                    effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
      arrange(Bonferroni, desc(enrichmentRatio)),
      caption = paste0('Enriched terms for module ', top_modules[4], ' (MTcor = ',
                       round(genes_info$MTcor[genes_info$Module==top_modules[4]][1],4), ')'))
Enriched terms for module #84AD00 (MTcor = -0.9309)
dataSetID shortDataSetName inGroups Bonferroni FDR enrichmentRatio effectiveClassSize effectiveSetSize nCommonGenes
JAMiller.AIBS.000124 HippocampusWGCNA yellow JA Miller at AIBS|Brain|Postnatal brain|WGCNA 0.0000395 0.0000001 2.175436 852 677 80
GO:0005759 mitochondrial matrix GO|GO.CC 0.7763462 0.0005425 2.107644 852 428 49
JAM:002985 Parietal Lobe_IN_Cerebral Cortex JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 1.0000000 0.0130730 3.012484 852 110 18
GO:0098798 mitochondrial protein complex GO|GO.CC 1.0000000 0.0016657 2.503063 852 228 31
JAMiller.AIBS.000206 RegionalWGCNA midfetal M36 JA Miller at AIBS|Brain|Prenatal brain|WGCNA 1.0000000 0.0018009 2.492133 852 229 31
GO:0005743 mitochondrial inner membrane GO|GO.CC 1.0000000 0.0139251 1.914949 852 423 44
JAMiller.AIBS.000048 CortexWGCNA 15-21 post-conception weeks C22 CPenriched enrichedForAutismGenes JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA 1.0000000 0.0009229 1.880390 852 607 62
GO:0044429 mitochondrial part GO|GO.CC 1.0000000 0.0016518 1.668186 852 927 84
JAMiller.AIBS.000044 CortexWGCNA 15-21 post-conception weeks C18 JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA 1.0000000 0.0167403 1.666610 852 718 65
GO:0005739 mitochondrion GO|GO.CC 1.0000000 0.0089002 1.473823 852 1399 112




Enrichment Analysis for Modules with High Percentage of SFARI Genes


Option 1: Select the modules with the highest percentage of SFARI Genes

# Prepare dataset
perc_SFARI_genes = dataset %>% mutate('hasSFARIscore' = !gene.score %in% c('None', 'Neuronal')) %>% 
                   group_by(Module, MTcor, hasSFARIscore) %>% summarise(s=n()) %>% 
                   left_join(dataset %>% group_by(Module) %>% summarise(n=n()), by='Module') %>% 
                   mutate(p=round(s/n*100,2)) %>% filter(hasSFARIscore & Module != 'gray') %>% arrange(desc(p))

perc_SFARI_genes %>% ggplot(aes(reorder(Module, -p), p)) + geom_bar(stat='identity', fill = perc_SFARI_genes$Module) + 
                     xlab('Module') + ylab('% of SFARI Genes') + theme_minimal() + theme(axis.text.x = element_text(angle = 90, hjust = 1))

keep_modules = perc_SFARI_genes$Module[1:2]

cat(paste0('Keeping top 2 modules: ', paste(keep_modules, collapse = ', ')))
## Keeping top 2 modules: #8195FF, #B79F00

Option 2: Assume independence between SFARI Genes and modules and calculate the probability of obtaining a proportion at least as big as the one found in each cluster given its size


Notation:


If we interpret the number of genes (\(n\)) in a cluster as \(n\) random draws without replacement from a finite population of size \(N\), and the number of SFARI genes in the cluster (\(k\)) as \(k\) successes in those \(n\) draws, where we know that \(N\) contains exactly \(K\) successes, then we can use the hypergeometric distribution to calculate the statistical significance of having drawn \(k\) successes out of \(n\) draws, and use this value to select the clusters with the highest statistical significance

We want to select the modules with the smallest probability, since this means that the probability of obtaining at least this number of SFARI genes by chance is small (the )

Note: Instead of calculating the probability we are using the log probability to differentiate better between the smallest probabilities

N = sum(perc_SFARI_genes$n)
S = sum(perc_SFARI_genes$s)

calc_prob = function(row){
  s = row[4] %>% as.numeric
  n = row[5] %>% as.numeric
  prob = phyper(s,S,N-S,n, log.p = TRUE, lower.tail=FALSE)
  return(prob)
}

perc_SFARI_genes$prob = apply(perc_SFARI_genes, 1, function(x) calc_prob(x))
perc_SFARI_genes = perc_SFARI_genes %>% arrange(prob)

perc_SFARI_genes %>% ggplot(aes(MTcor, prob, size=n)) + geom_point(color=perc_SFARI_genes$Module, alpha=0.5) + geom_smooth(color='#cccccc', se=FALSE) + 
                     xlab('Module-Trait Correlation') + ylab('Hypergeometric (log)Probability') + theme_minimal() + theme(legend.position = 'none')

We are no longer selecting the modules with the highest percentage of SFARI genes, but instead we are pondering this by the size of the module

In the end we are keeping cluster #B79F00 as a top cluster but we are replacing #8195FF for #FE61CF, which has a lower percentage of SFARI genes but is a bigger cluster

perc_SFARI_genes %>% ggplot(aes(p, prob, size=n)) + geom_point(color=perc_SFARI_genes$Module, alpha=0.5) + geom_smooth(color='#cccccc', se=FALSE) + 
                     xlab('% of SFARI Genes') + ylab('Hypergeometric (log)Probability') + theme_minimal() + theme(legend.position = 'none')

perc_SFARI_genes %>% ggplot(aes(reorder(Module, prob), prob)) + geom_bar(stat='identity', fill = perc_SFARI_genes$Module) + 
                     xlab('Module') + ylab('(Hypergeometric (log)Probability') + theme_minimal() + theme(axis.text.x = element_text(angle = 90, hjust = 1))

keep_modules = perc_SFARI_genes$Module[1:2]

cat(paste0('Keeping top 2 modules: ', paste(keep_modules, collapse = ', ')))
## Keeping top 2 modules: #B79F00, #FE61CF
rm(N,S,calc_prob)


Results


kable(enrichment$enrichmentTable %>% filter(class==keep_modules[1]) %>% 
      dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
                    effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
      arrange(Bonferroni, desc(enrichmentRatio)),
      caption = paste0('Enriched terms for module ', keep_modules[1], ' (SFARI Genes = ',
                       round(perc_SFARI_genes$p[perc_SFARI_genes$Module==keep_modules[1]][1],4), '%)'))
Enriched terms for module #B79F00 (SFARI Genes = 16.09%)
dataSetID shortDataSetName inGroups Bonferroni FDR enrichmentRatio effectiveClassSize effectiveSetSize nCommonGenes
JAMiller.AIBS.000569 WGCNA humanSpecificOlivedrab2Module frontalCtx FOXP2 JA Miller at AIBS|Brain|Postnatal brain|Cortex|WGCNA 0.0008823 0.0000011 1.850606 168 4036 80
GO:0051252 regulation of RNA metabolic process GO|GO.BP 0.2041583 0.0001640 1.881450 168 3027 61
GO:0090304 nucleic acid metabolic process GO|GO.BP 0.2908464 0.0002255 1.670297 168 4304 77
GO:2000112 regulation of cellular macromolecule biosynthetic process GO|GO.BP 0.2970930 0.0002299 1.847594 168 3133 62
GO:0019219 regulation of nucleobase-containing compound metabolic process GO|GO.BP 0.3413090 0.0002605 1.812876 168 3296 64
GO:0010556 regulation of macromolecule biosynthetic process GO|GO.BP 1.0000000 0.0007780 1.778891 168 3254 62
GO:0031326 regulation of cellular biosynthetic process GO|GO.BP 1.0000000 0.0007603 1.754842 168 3405 64
GO:0009889 regulation of biosynthetic process GO|GO.BP 1.0000000 0.0006901 1.748373 168 3471 65
GO:0016070 RNA metabolic process GO|GO.BP 1.0000000 0.0010016 1.683317 168 3827 69
GO:0005634 nucleus GO|GO.CC 1.0000000 0.0013611 1.475657 168 5884 93
kable(enrichment$enrichmentTable %>% filter(class==keep_modules[2]) %>% 
      dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
                    effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
      arrange(Bonferroni, enrichmentRatio),
      caption = paste0('Enriched terms for module ', keep_modules[2], ' (SFARI Genes = ',
                       round(perc_SFARI_genes$p[perc_SFARI_genes$Module==keep_modules[2]][1],4), '%)'))
Enriched terms for module #FE61CF (SFARI Genes = 12.06%)
dataSetID shortDataSetName inGroups Bonferroni FDR enrichmentRatio effectiveClassSize effectiveSetSize nCommonGenes
JAMiller.AIBS.000188 CortexWGCNA midfetal M18 JA Miller at AIBS|Brain|Prenatal brain|WGCNA 0.0000000 0.0000000 7.058003 313 426 60
JAMiller.AIBS.000195 RegionalWGCNA midfetal M25 JA Miller at AIBS|Brain|Prenatal brain|WGCNA 0.0000000 0.0000000 8.176139 313 190 31
JAMiller.AIBS.000048 CortexWGCNA 15-21 post-conception weeks C22 CPenriched enrichedForAutismGenes JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA 0.0000000 0.0000000 3.962714 313 607 48
JAMiller.AIBS.000044 CortexWGCNA 15-21 post-conception weeks C18 JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA 0.0000097 0.0000000 3.140713 313 718 45
JAMiller.AIBS.000268 Genes bound by EGR1 in HUMAN ERYTHROLEUKEMIA from PMID 20690147 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.0001385 0.0000002 1.556912 313 4828 150
JAMiller.AIBS.000155 Lowest in VZ of E14.5 mouse JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex 0.0004462 0.0000006 2.138185 313 1664 71
JAMiller.AIBS.000569 WGCNA humanSpecificOlivedrab2Module frontalCtx FOXP2 JA Miller at AIBS|Brain|Postnatal brain|Cortex|WGCNA 0.0006891 0.0000009 1.614107 313 4036 130
JAMiller.AIBS.000257 Genes bound by DMRT1 in MOUSE TESTES from PMID 23473982 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.0747682 0.0000662 1.969328 313 1654 65
JAMiller.AIBS.000245 Genes bound by CREB1 in RAT HIPPOCAMPUS from PMID 23762244 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.1714689 0.0001401 1.823144 313 2034 74
JAMiller.AIBS.000124 HippocampusWGCNA yellow JA Miller at AIBS|Brain|Postnatal brain|WGCNA 0.3129563 0.0002413 2.590714 313 677 35



Session info

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] org.Hs.eg.db_3.8.2                      
##  [2] BrainDiseaseCollection_1.00             
##  [3] anRichment_1.01-2                       
##  [4] TxDb.Mmusculus.UCSC.mm10.knownGene_3.4.7
##  [5] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2 
##  [6] GenomicFeatures_1.36.4                  
##  [7] GenomicRanges_1.36.1                    
##  [8] GenomeInfoDb_1.20.0                     
##  [9] anRichmentMethods_0.90-1                
## [10] WGCNA_1.69                              
## [11] fastcluster_1.1.25                      
## [12] dynamicTreeCut_1.63-1                   
## [13] GO.db_3.8.2                             
## [14] AnnotationDbi_1.46.1                    
## [15] IRanges_2.18.3                          
## [16] S4Vectors_0.22.1                        
## [17] Biobase_2.44.0                          
## [18] BiocGenerics_0.30.0                     
## [19] biomaRt_2.40.5                          
## [20] knitr_1.28                              
## [21] doParallel_1.0.15                       
## [22] iterators_1.0.12                        
## [23] foreach_1.5.0                           
## [24] polycor_0.7-10                          
## [25] expss_0.10.2                            
## [26] GGally_1.5.0                            
## [27] gridExtra_2.3                           
## [28] viridis_0.5.1                           
## [29] viridisLite_0.3.0                       
## [30] RColorBrewer_1.1-2                      
## [31] dendextend_1.13.4                       
## [32] plotly_4.9.2                            
## [33] glue_1.3.2                              
## [34] reshape2_1.4.3                          
## [35] forcats_0.5.0                           
## [36] stringr_1.4.0                           
## [37] dplyr_0.8.5                             
## [38] purrr_0.3.3                             
## [39] readr_1.3.1                             
## [40] tidyr_1.0.2                             
## [41] tibble_3.0.0                            
## [42] ggplot2_3.3.0                           
## [43] tidyverse_1.3.0                         
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1                backports_1.1.5            
##   [3] Hmisc_4.4-0                 plyr_1.8.6                 
##   [5] lazyeval_0.2.2              splines_3.6.3              
##   [7] crosstalk_1.1.0.1           BiocParallel_1.18.1        
##   [9] digest_0.6.25               htmltools_0.4.0            
##  [11] fansi_0.4.1                 magrittr_1.5               
##  [13] checkmate_2.0.0             memoise_1.1.0              
##  [15] cluster_2.1.0               annotate_1.62.0            
##  [17] Biostrings_2.52.0           modelr_0.1.6               
##  [19] matrixStats_0.56.0          prettyunits_1.1.1          
##  [21] jpeg_0.1-8.1                colorspace_1.4-1           
##  [23] blob_1.2.1                  rvest_0.3.5                
##  [25] haven_2.2.0                 xfun_0.12                  
##  [27] crayon_1.3.4                RCurl_1.98-1.1             
##  [29] jsonlite_1.6.1              genefilter_1.66.0          
##  [31] impute_1.58.0               survival_3.1-12            
##  [33] gtable_0.3.0                zlibbioc_1.30.0            
##  [35] XVector_0.24.0              DelayedArray_0.10.0        
##  [37] scales_1.1.0                mvtnorm_1.1-0              
##  [39] DBI_1.1.0                   Rcpp_1.0.4                 
##  [41] xtable_1.8-4                progress_1.2.2             
##  [43] htmlTable_1.13.3            foreign_0.8-76             
##  [45] bit_1.1-15.2                preprocessCore_1.46.0      
##  [47] Formula_1.2-3               htmlwidgets_1.5.1          
##  [49] httr_1.4.1                  acepack_1.4.1              
##  [51] ellipsis_0.3.0              farver_2.0.3               
##  [53] pkgconfig_2.0.3             reshape_0.8.8              
##  [55] XML_3.99-0.3                nnet_7.3-14                
##  [57] dbplyr_1.4.2                locfit_1.5-9.4             
##  [59] labeling_0.3                tidyselect_1.0.0           
##  [61] rlang_0.4.5                 munsell_0.5.0              
##  [63] cellranger_1.1.0            tools_3.6.3                
##  [65] cli_2.0.2                   generics_0.0.2             
##  [67] RSQLite_2.2.0               broom_0.5.5                
##  [69] evaluate_0.14               yaml_2.2.1                 
##  [71] bit64_0.9-7                 fs_1.4.0                   
##  [73] nlme_3.1-147                xml2_1.2.5                 
##  [75] compiler_3.6.3              rstudioapi_0.11            
##  [77] curl_4.3                    png_0.1-7                  
##  [79] reprex_0.3.0                geneplotter_1.62.0         
##  [81] stringi_1.4.6               highr_0.8                  
##  [83] lattice_0.20-41             Matrix_1.2-18              
##  [85] vctrs_0.2.4                 pillar_1.4.3               
##  [87] lifecycle_0.2.0             data.table_1.12.8          
##  [89] bitops_1.0-6                rtracklayer_1.44.4         
##  [91] R6_2.4.1                    latticeExtra_0.6-29        
##  [93] codetools_0.2-16            assertthat_0.2.1           
##  [95] SummarizedExperiment_1.14.1 DESeq2_1.24.0              
##  [97] withr_2.1.2                 GenomicAlignments_1.20.1   
##  [99] Rsamtools_2.0.3             GenomeInfoDbData_1.2.1     
## [101] mgcv_1.8-31                 hms_0.5.3                  
## [103] grid_3.6.3                  rpart_4.1-15               
## [105] rmarkdown_2.1               lubridate_1.7.4            
## [107] base64enc_0.1-3